{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 第0讲：Pandas简介\n",
    "\n",
    "+ Python第三方库，提供高性能易用数据类型和分析工具\n",
    "+ 引用方法：import pandas as pd\n",
    "+ Pandas基于NumPy实现，常与NumPy和Matplotlib一同使用\n",
    "\n",
    "---\n",
    "两个数据类型：Series(一维数据）, DataFrame（高维数据)\n",
    "\n",
    "基本操作、运算操作、特征类操作、关联类操作\n",
    "\n",
    "---\n",
    "**和NumPy的比较**\n",
    "+ NumPy关注数据的结构表达 Pandas关注数据的应用表达\n",
    "+ NumPy提供基础数据类型，Pandas提供扩展数据类型\n",
    "+ NumPy考虑数据间关系 Pandas考虑数据与索引间关系"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0    0\n",
       " 1    1\n",
       " 2    2\n",
       " 3    3\n",
       " 4    4\n",
       " dtype: int64,\n",
       " 0     0\n",
       " 1     1\n",
       " 2     3\n",
       " 3     6\n",
       " 4    10\n",
       " dtype: int64)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#一个例子\n",
    "import pandas as pd\n",
    "d = pd.Series(range(5))\n",
    "d,d.cumsum()#d 以及 d的前n项和"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---\n",
    "# 第1节：Series类型\n",
    "\n",
    "Series类型由一组【数据及索引】组成\n",
    "\n",
    "简单来说就是【一个索引index对应一个数据data】"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    9\n",
       "1    8\n",
       "2    7\n",
       "3    6\n",
       "dtype: int64"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d = pd.Series([9,8,7,6])\n",
    "d \n",
    "# 注意看左侧的0 1 2 3是自动索引\n",
    "# dtype 沿用NumPy的数据类型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "myindex1    9\n",
       "myindex2    8\n",
       "myindex3    7\n",
       "myindex4    6\n",
       "dtype: int64"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 【如果需要】可以自己设置index\n",
    "d = pd.Series([9,8,7,6],index = ['myindex1','myindex2','myindex3','myindex4'])\n",
    "d"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "myindex1    str\n",
       "myindex2    str\n",
       "myindex3    str\n",
       "myindex4    str\n",
       "dtype: object"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 【创建Series类型】方法1：从标量创建\n",
    "# 这个方法【不可以】省略index\n",
    "d = pd.Series('str' ,index = ['myindex1','myindex2','myindex3','myindex4'])\n",
    "d"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    1.0\n",
       "b    2.0\n",
       "c    3.0\n",
       "d    NaN\n",
       "1    5.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 方法2：从字典创建\n",
    "# 由于字典也是【index-data】结构，可以从字典创建Series\n",
    "\n",
    "dir = {'a': 1, 'b': 2, 'c': 3, 1: 5}\n",
    "index = ['a', 'b', 'c', 'd', 1]\n",
    "\n",
    "pd.Series(dir, index) # Series自己去字典找对应index的值，如果找不到就是NaN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "6    0.141522\n",
       "7    0.480137\n",
       "8    0.206698\n",
       "9    0.304436\n",
       "dtype: float64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 方法3：从 NumPy 的 ndarray 类型创建\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "# data  = np.arange(1,5)\n",
    "data = np.random.rand(4)\n",
    "index = np.arange(6,10)\n",
    "\n",
    "pd.Series(data, index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "7   -1\n",
       "3   -5\n",
       "4    6\n",
       "6    2\n",
       "dtype: int64"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 方法4：Python自带列表\n",
    "data = [-1,-5,6,2]\n",
    "index = [7,3,4,6]\n",
    "\n",
    "pd.Series(data, index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1    0\n",
       "3    1\n",
       "5    2\n",
       "7    3\n",
       "9    4\n",
       "dtype: int64"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 方法5：Pandas自带函数\n",
    "pd.Series(range(5),index = [1,3,5,7,9])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([1, 3, 5, 7], dtype=int64), Index(['a', 'b', 'c', 'd'], dtype='object'))"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#【Series类型的操作】\n",
    "\n",
    "data  = [1,3,5,7]\n",
    "index = ['a','b','c','d']\n",
    "\n",
    "d = pd.Series(data, index)\n",
    "d.values,d.index\n",
    "# .index 获得索引\n",
    "# .values 获得数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1, 1)"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d['a'],d[0] #系统自动给的索引 和 手动给的索引 都是存在并且可以使用的"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    1\n",
       "c    5\n",
       "d    7\n",
       "dtype: int64"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d[['a','c','d']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 但是不要把二者混用\n",
    "# d[['a',1,3]] 这行代码会报错"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(7,\n",
       " a    1\n",
       " b    3\n",
       " c    5\n",
       " dtype: int64,\n",
       " a       2.718282\n",
       " b      20.085537\n",
       " c     148.413159\n",
       " d    1096.633158\n",
       " dtype: float64)"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 【Series基本操作】\n",
    "\n",
    "data  = [1,3,5,7]\n",
    "index = ['a','b','c','d']\n",
    "\n",
    "b = pd.Series(data, index)\n",
    "\n",
    "a1 = b[3] #索引\n",
    "\n",
    "a2 = b[:3] #切片\n",
    "\n",
    "a3 = np.exp(b)#numpy可以直接操作\n",
    "\n",
    "a1,a2,a3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 【Series类型的操作类似Python字典类型】\n",
    "\n",
    "# 通过自定义索引访问\n",
    "\n",
    "# 保留字in操作：对index采用in方法\n",
    "data  = [1,3,5,7]\n",
    "index = ['a','b','c','d']\n",
    "\n",
    "b = pd.Series(data, index)\n",
    "'a' in b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(7, -1)"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 使用.get()方法 dict.get(key, default=None)\n",
    "# 如果key的值存在，返回key的值，否则返回默认参数default的值\n",
    "b.get('d',-1),b.get('x',-1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---\n",
    "### 以上讲解了创建和访问Series对象的基本方法\n",
    "### 接下来看一下Series的高级用法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(a    1\n",
       " b    3\n",
       " c    5\n",
       " d    7\n",
       " dtype: int64,\n",
       " c    2\n",
       " d    4\n",
       " e    6\n",
       " f    8\n",
       " dtype: int64)"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Series类型在运算中会【自动对齐】不同索引的数据\n",
    "data  = [1,3,5,7]\n",
    "index = ['a','b','c','d']\n",
    "\n",
    "a = pd.Series(data, index)\n",
    "\n",
    "data1  = [2,4,6,8]\n",
    "index1 = ['c','d','e','f']\n",
    "\n",
    "b = pd.Series(data1, index1)\n",
    "a,b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a     NaN\n",
       "b     NaN\n",
       "c     7.0\n",
       "d    11.0\n",
       "e     NaN\n",
       "f     NaN\n",
       "dtype: float64"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a+b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "索引列\n",
       "a    1\n",
       "b    3\n",
       "c    5\n",
       "d    7\n",
       "Name: 一个Series数组, dtype: int64"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Series对象和索引都可以有一个【名字】，存储在属性.name中\n",
    "data  = [1,3,5,7]\n",
    "index = ['a','b','c','d']\n",
    "\n",
    "b = pd.Series(data, index)\n",
    "b.name = '一个Series数组'\n",
    "b.index.name = '索引列'\n",
    "b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    100\n",
       "b      3\n",
       "c      5\n",
       "d      7\n",
       "Name: Name100, dtype: int64"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 【随时修改并即刻生效】\n",
    "\n",
    "data  = [1,3,5,7]\n",
    "index = ['a','b','c','d']\n",
    "\n",
    "b = pd.Series(data, index)\n",
    "b.name = 'Name1'\n",
    "\n",
    "#修改Series\n",
    "b['a'] = 100\n",
    "b.name = 'Name100'\n",
    "b#可以看到即可生效了"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 总结\n",
    "Series是一维带“标签”数组（标签即index）\n",
    "\n",
    "它的用法基本上和List还有ndarray的一维数组类似\n",
    "\n",
    "---"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---\n",
    "# 第2节：DataFrame类型\n",
    "\n",
    "处理二维/多维数据\n",
    "\n",
    "最简单、最常用的情况：二维表格\n",
    "# 定义\n",
    "![avator](./image/day4-img1.PNG)\n",
    "\n",
    "---\n",
    "# 相关\n",
    "![avator](./image/day4-img2.PNG)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   0  1  2  3  4\n",
       "0  0  1  2  3  4\n",
       "1  5  6  7  8  9"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#【创建DataFrame】方法1：从二维ndarray对象创建\n",
    "data = np.arange(10).reshape(2,5)\n",
    "d = pd.DataFrame(data)\n",
    "d # 注意到自动生成了【行、列索引】"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>1.0</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>2.0</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>3.0</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>NaN</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   one  two\n",
       "a  1.0    9\n",
       "b  2.0    8\n",
       "c  3.0    7\n",
       "d  NaN    6"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "# 方法2：由一维ndarray、列表、字典、元组或Series构成的 字典\n",
    "\n",
    "# ndarray\n",
    "data = {\n",
    "    'one': np.arange(0, 5),\n",
    "    'two': np.arange(6, 11)\n",
    "}\n",
    "\n",
    "# 列表\n",
    "data = {\n",
    "    'one': [1,3,5,7],\n",
    "    'two': ['a','w','e','r']\n",
    "}\n",
    "\n",
    "#Series\n",
    "data = {\n",
    "    'one':pd.Series([1,2,3],index = ['a','b','c']),\n",
    "    'two':pd.Series([9,8,7,6],index = ['a','b','c','d'])\n",
    "}\n",
    "\n",
    "d = pd.DataFrame(data)\n",
    "d"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>two</th>\n",
       "      <th>three</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>8</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>7</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>6</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   two three\n",
       "b    8   NaN\n",
       "c    7   NaN\n",
       "d    6   NaN"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 【数据根据行列索引自动补齐】\n",
    "\n",
    "data = {\n",
    "    'one':pd.Series([1,2,3],index = ['a','b','c']),\n",
    "    'two':pd.Series([9,8,7,6],index = ['a','b','c','d'])\n",
    "}\n",
    "d = pd.DataFrame(data,index = ['b','c','d'],columns = ['two','three'])\n",
    "d"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(Index(['b', 'c', 'd'], dtype='object'),\n",
       " array([[8, nan],\n",
       "        [7, nan],\n",
       "        [6, nan]], dtype=object),\n",
       " Index(['two', 'three'], dtype='object'))"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d.index,d.values,d.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "b    8\n",
       "c    7\n",
       "d    6\n",
       "Name: two, dtype: int64"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d['two']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "# d.ix['c']\n",
    "# ix函数（0.20.0版本后已经弃用）\n",
    "\n",
    "# DataFrame基本操作类似Series，依据行列索引"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---\n",
    "# Pandas的数据类型操作\n",
    "改变Series和DataFrame对象"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>columns1</th>\n",
       "      <th>columns2</th>\n",
       "      <th>columns3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>row1</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>row2</th>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>row3</th>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "      <td>27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>row4</th>\n",
       "      <td>4</td>\n",
       "      <td>8</td>\n",
       "      <td>81</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      columns1  columns2  columns3\n",
       "row1         1         2         3\n",
       "row2         2         4         9\n",
       "row3         3         6        27\n",
       "row4         4         8        81"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = {\n",
    "    'columns1':[1,2,3,4],\n",
    "    'columns2':[2,4,6,8],\n",
    "    'columns3':[3,9,27,81]\n",
    "}\n",
    "\n",
    "d = pd.DataFrame(data,index = ['row1','row2','row3','row4'])\n",
    "\n",
    "d"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>columns1</th>\n",
       "      <th>columns2</th>\n",
       "      <th>columns3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>row4</th>\n",
       "      <td>4</td>\n",
       "      <td>8</td>\n",
       "      <td>81</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>row3</th>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "      <td>27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>row2</th>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>row1</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      columns1  columns2  columns3\n",
       "row4         4         8        81\n",
       "row3         3         6        27\n",
       "row2         2         4         9\n",
       "row1         1         2         3"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 重新索引：.reindex()能够改变或重排Series和DataFrame索引\n",
    "\n",
    "d = d.reindex(index = ['row4','row3','row2','row1'])\n",
    "\n",
    "d"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>columns3</th>\n",
       "      <th>columns2</th>\n",
       "      <th>columns1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>row4</th>\n",
       "      <td>81</td>\n",
       "      <td>8</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>row3</th>\n",
       "      <td>27</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>row2</th>\n",
       "      <td>9</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>row1</th>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      columns3  columns2  columns1\n",
       "row4        81         8         4\n",
       "row3        27         6         3\n",
       "row2         9         4         2\n",
       "row1         3         2         1"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d = d.reindex(columns = ['columns3','columns2','columns1'])\n",
    "d"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### .reindex()的完整参数表\n",
    "![avatar](./image/day4-img3.PNG)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>columns1</th>\n",
       "      <th>columns2</th>\n",
       "      <th>columns3</th>\n",
       "      <th>new-column</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>row1</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>row2</th>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>9</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>row3</th>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "      <td>27</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>row4</th>\n",
       "      <td>4</td>\n",
       "      <td>8</td>\n",
       "      <td>81</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      columns1  columns2  columns3  new-column\n",
       "row1         1         2         3          -1\n",
       "row2         2         4         9          -1\n",
       "row3         3         6        27          -1\n",
       "row4         4         8        81          -1"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# fill_value参数\n",
    "data = {\n",
    "    'columns1':[1,2,3,4],\n",
    "    'columns2':[2,4,6,8],\n",
    "    'columns3':[3,9,27,81]\n",
    "}\n",
    "\n",
    "d = pd.DataFrame(data,index = ['row1','row2','row3','row4'])\n",
    "\n",
    "newcol = d.columns.insert(3,'new-column')\n",
    "\n",
    "d = d.reindex(columns = newcol,fill_value = -1)\n",
    "d #空缺值填充为 -1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['row1', 'row2', 'row3', 'row4'], dtype='object')"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Series和DataFrame的索引是【Index类型】\n",
    "# Index对象是不可修改类型\n",
    "d.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(Index(['index1', 'index2', 'index3', 'index4'], dtype='object'),\n",
       " Index(['index1', 'index2', 'index5', 'index6'], dtype='object'))"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#【index类型】的基本操作\n",
    "# .append(idx) 连接另一个Index对象，产生新的Index对象【类似于<做加法>】\n",
    "\n",
    "data = [1,3,5,7]\n",
    "index1 = ['index1','index2','index3','index4']\n",
    "index2 = ['index1','index2','index5','index6']\n",
    "\n",
    "a = pd.Series(data,index1)\n",
    "b = pd.Series(data,index2)\n",
    "\n",
    "Index_a = a.index\n",
    "Index_b = b.index\n",
    "# 首先创建 2 个Index对象\n",
    "Index_a,Index_b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['index1', 'index2', 'index3', 'index4', 'index1', 'index2', 'index5',\n",
       "       'index6'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Index_a.append(Index_b)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "# .diff(idx) 计算差集，产生新的Index对象【类似于减法】\n",
    "# 但是可能是因为Pandas版本问题，这个方法不能使用了\n",
    "# 这个问题仍然在查证中\n",
    "# Index_a.diff(Index_b)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(Index(['index1', 'index2'], dtype='object'),\n",
       " Index(['index1', 'index2', 'index3', 'index4', 'index5', 'index6'], dtype='object'))"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# .intersection(idx) 计算交集\n",
    "# .union(idx) 计算并集\n",
    "\n",
    "Index_a.intersection(Index_b),Index_a.union(Index_b)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Index(['index1', 'index2', 'index3', 'index4'], dtype='object')\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "Index(['index1', 'index3', 'index4'], dtype='object')"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# .delete(loc) 删除loc位置处的元素\n",
    "# .insert(loc,e) 在loc位置增加一个元素e\n",
    "print(Index_a)\n",
    "Index_a.delete(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(Index(['index1', 'index2', 'index5', 'index6'], dtype='object'),\n",
       " Index(['index1', 'index2', 'index5', 'index6', 'newindex'], dtype='object'))"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Index_b,Index_b.insert(4,'newindex')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(index1    1\n",
       " index2    3\n",
       " index3    5\n",
       " index4    7\n",
       " dtype: int64,\n",
       " index1    1\n",
       " index3    5\n",
       " dtype: int64)"
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# .drop()能够【删除】Series和DataFrame指定行或列索引\n",
    "data = [1,3,5,7]\n",
    "index1 = ['index1','index2','index3','index4']\n",
    "\n",
    "a = pd.Series(data,index1)\n",
    "a , a.drop(['index2','index4'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>columns1</th>\n",
       "      <th>columns2</th>\n",
       "      <th>columns3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>row1</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>row2</th>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>row3</th>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "      <td>27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>row4</th>\n",
       "      <td>4</td>\n",
       "      <td>8</td>\n",
       "      <td>81</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      columns1  columns2  columns3\n",
       "row1         1         2         3\n",
       "row2         2         4         9\n",
       "row3         3         6        27\n",
       "row4         4         8        81"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = {\n",
    "    'columns1':[1,2,3,4],\n",
    "    'columns2':[2,4,6,8],\n",
    "    'columns3':[3,9,27,81]\n",
    "}\n",
    "\n",
    "d = pd.DataFrame(data,index = ['row1','row2','row3','row4'])\n",
    "d"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>columns1</th>\n",
       "      <th>columns2</th>\n",
       "      <th>columns3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>row2</th>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>row3</th>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "      <td>27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>row4</th>\n",
       "      <td>4</td>\n",
       "      <td>8</td>\n",
       "      <td>81</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      columns1  columns2  columns3\n",
       "row2         2         4         9\n",
       "row3         3         6        27\n",
       "row4         4         8        81"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d.drop('row1',axis = 0)# axis默认就是0，表示删除index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>columns1</th>\n",
       "      <th>columns3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>row1</th>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>row2</th>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>row3</th>\n",
       "      <td>3</td>\n",
       "      <td>27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>row4</th>\n",
       "      <td>4</td>\n",
       "      <td>81</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      columns1  columns3\n",
       "row1         1         3\n",
       "row2         2         9\n",
       "row3         3        27\n",
       "row4         4        81"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d.drop('columns2',axis = 1)# axis是1，表示删除column"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---\n",
    "# 第3节：数据类型计算\n",
    "\n",
    "### **规则**\n",
    "+ 算术运算根据行列索引，补齐后运算，运算默认产生浮点数\n",
    "+ 补齐时缺项填充NaN (空值)\n",
    "+ 二维和一维、一维和零维间为广播运算\n",
    "+ 采用$+-*/$符号进行的二元运算产生新的对象"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(   0  1   2   3\n",
       " 0  0  1   2   3\n",
       " 1  4  5   6   7\n",
       " 2  8  9  10  11,\n",
       "     0   1   2   3   4\n",
       " 0   0   1   2   3   4\n",
       " 1   5   6   7   8   9\n",
       " 2  10  11  12  13  14\n",
       " 3  15  16  17  18  19,\n",
       "       0     1     2     3   4\n",
       " 0   0.0   2.0   4.0   6.0 NaN\n",
       " 1   9.0  11.0  13.0  15.0 NaN\n",
       " 2  18.0  20.0  22.0  24.0 NaN\n",
       " 3   NaN   NaN   NaN   NaN NaN,\n",
       "       0     1      2      3   4\n",
       " 0   0.0   1.0    4.0    9.0 NaN\n",
       " 1  20.0  30.0   42.0   56.0 NaN\n",
       " 2  80.0  99.0  120.0  143.0 NaN\n",
       " 3   NaN   NaN    NaN    NaN NaN)"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#【例子】\n",
    "a = pd.DataFrame(np.arange(12).reshape(3, 4))\n",
    "\n",
    "b = pd.DataFrame(np.arange(20).reshape(4, 5))\n",
    "\n",
    "a,b,a+b,a*b # 自动补齐NaN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-98.0</td>\n",
       "      <td>-97.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      0     1\n",
       "0   0.0   2.0\n",
       "1 -98.0 -97.0"
      ]
     },
     "execution_count": 81,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 【方法形式的】四则运算\n",
    "\n",
    "# .add(d, **argws) 类型间加法运算，可选参数\n",
    "# .sub(d, **argws) 类型间减法运算，可选参数\n",
    "# .mul(d, **argws) 类型间乘法运算，可选参数\n",
    "# .div(d, **argws) 类型间除法运算，可选参数\n",
    "\n",
    "a = pd.DataFrame(np.arange(2).reshape(1, 2))\n",
    "b = pd.DataFrame(np.arange(4).reshape(2, 2))\n",
    "a.add(b,fill_value = -100) #缺省的NaN用-100代替\n",
    "# 等价于[0,1][-100,-100] + [0,1][2,3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-200.0</td>\n",
       "      <td>-300.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       0      1\n",
       "0    0.0    1.0\n",
       "1 -200.0 -300.0"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a.mul(b,fill_value = -100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>6</td>\n",
       "      <td>8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   0  1   2   3\n",
       "0  0  2 NaN NaN\n",
       "1  2  4 NaN NaN\n",
       "2  4  6 NaN NaN\n",
       "3  6  8 NaN NaN"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 广播运算是指两个数组进行运算，一个是Series数组（即一维数组），另一个是DataFrame数组（多维数组）\n",
    "# 不同维度间为广播运算，一维Series【默认】在轴 1 参与运算\n",
    "a = pd.Series(np.arange(4))\n",
    "b = pd.DataFrame(np.arange(8).reshape(4,2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    0\n",
       "1    1\n",
       "2    2\n",
       "3    3\n",
       "dtype: int32"
      ]
     },
     "execution_count": 87,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   0  1\n",
       "0  0  1\n",
       "1  2  3\n",
       "2  4  5\n",
       "3  6  7"
      ]
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>6</td>\n",
       "      <td>8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   0  1   2   3\n",
       "0  0  2 NaN NaN\n",
       "1  2  4 NaN NaN\n",
       "2  4  6 NaN NaN\n",
       "3  6  8 NaN NaN"
      ]
     },
     "execution_count": 89,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a+b #每一列都对应加上了0，1，2，3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   0   1\n",
       "0  0   1\n",
       "1  3   4\n",
       "2  6   7\n",
       "3  9  10"
      ]
     },
     "execution_count": 94,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 使用运算方法，使得参与轴0的运算\n",
    "b.add(a,axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       0      1\n",
       "0  False   True\n",
       "1   True  False"
      ]
     },
     "execution_count": 97,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 比较运算只能比较相同索引的元素，不进行补齐\n",
    "# 也就是说只能比较【n*m和n*m的表格】或者比较【长度都为n】的Series\n",
    "\n",
    "a = pd.DataFrame(np.arange(4).reshape(2, 2))\n",
    "\n",
    "b = pd.DataFrame(np.array([1,1,2,2]).reshape(2, 2))\n",
    "a==b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     True\n",
       "1    False\n",
       "2     True\n",
       "3    False\n",
       "dtype: bool"
      ]
     },
     "execution_count": 98,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a = pd.Series(np.arange(4))\n",
    "b = pd.Series([-1,1,0,3])\n",
    "a > b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       0      1      2      3\n",
       "0   True   True  False  False\n",
       "1  False  False  False  False"
      ]
     },
     "execution_count": 100,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 二维和一维、一维和零维间为广播运算\n",
    "# 不同维度，广播运算，默认在 1 轴\n",
    "a = pd.Series(np.arange(4))\n",
    "b = pd.DataFrame(np.arange(4).reshape(2, 2))\n",
    "a == b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.8.1 64-bit",
   "language": "python",
   "name": "python38164bitfe2ebe8b9a4f493e93d81a74c550882b"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
